/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.io;
import java.io.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
public final class WritableUtils {
public static byte[] readCompressedByteArray(DataInput in) throws IOException {
int length = in.readInt();
byte[] buffer = new byte[length];
in.readFully(buffer); // could/should use readFully(buffer,0,length)?
GZIPInputStream gzi = new GZIPInputStream(new ByteArrayInputStream(buffer, 0, buffer.length));
byte[] outbuf = new byte[length];
ByteArrayOutputStream bos = new ByteArrayOutputStream();
int len;
while((len=gzi.read(outbuf,0,outbuf.length)) != -1){
bos.write(outbuf,0,len);
}
byte[] decompressed = bos.toByteArray();
bos.close();
gzi.close();
return decompressed;
}
public static int writeCompressedByteArray(DataOutput out, byte[] bytes) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
GZIPOutputStream gzout = new GZIPOutputStream(bos);
gzout.write(bytes,0,bytes.length);
gzout.close();
byte[] buffer = bos.toByteArray();
int len = buffer.length;
out.writeInt(len);
out.write(buffer,0,len);
/* debug only! Once we have confidence, can lose this. */
return ((bytes.length != 0) ? (100*buffer.length)/bytes.length : 0);
}
/* Ugly utility, maybe someone else can do this better */
public static String readCompressedString(DataInput in) throws IOException {
return new String(readCompressedByteArray(in),"UTF-8");
}
public static int writeCompressedString(DataOutput out, String s) throws IOException {
return writeCompressedByteArray(out, s.getBytes("UTF-8"));
}
/*
*
* Write a String as a Network Int n, followed by n Bytes
* Alternative to 16 bit read/writeUTF.
* Encoding standard is... ?
*
*/
public static void writeString(DataOutput out, String s) throws IOException {
byte[] buffer = s.getBytes("UTF-8");
int len = buffer.length;
out.writeInt(len);
out.write(buffer,0,len);
}
/*
* Read a String as a Network Int n, followed by n Bytes
* Alternative to 16 bit read/writeUTF.
* Encoding standard is... ?
*
*/
public static String readString(DataInput in) throws IOException{
int length = in.readInt();
byte[] buffer = new byte[length];
in.readFully(buffer); // could/should use readFully(buffer,0,length)?
return new String(buffer,"UTF-8");
}
/*
* Write a String array as a Nework Int N, followed by Int N Byte Array Strings.
* Could be generalised using introspection.
*
*/
public static void writeStringArray(DataOutput out, String[] s) throws IOException{
out.writeInt(s.length);
for(int i=0;i < s.length;i++){
writeString(out,s[i]);
}
}
/*
* Write a String array as a Nework Int N, followed by Int N Byte Array Strings.
* Could be generalised using introspection. Actually this bit couldn't...
*
*/
public static String[] readStringArray(DataInput in) throws IOException {
int len = in.readInt();
String[] s = new String[len];
for(int i=0;i < len;i++){
s[i] = readString(in);
}
return s;
}
/*
*
* Test Utility Method Display Byte Array.
*
*/
public static void displayByteArray(byte[] record){
int i;
for(i=0;i < record.length -1 ; i++){
if (i % 16 == 0) { System.out.println(); }
System.out.print(Integer.toHexString(record[i] >> 4 & 0x0F));
System.out.print(Integer.toHexString(record[i] & 0x0F));
System.out.print(",");
}
System.out.print(Integer.toHexString(record[i] >> 4 & 0x0F));
System.out.print(Integer.toHexString(record[i] & 0x0F));
System.out.println();
}
}